{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "“austen-emma.txt”的文本长度为887071,词汇数量为192427,句子数量为887071\n",
      "“austen-persuasion.txt”的文本长度为466292,词汇数量为98171,句子数量为466292\n",
      "“austen-sense.txt”的文本长度为673022,词汇数量为141576,句子数量为673022\n",
      "“bible-kjv.txt”的文本长度为4332554,词汇数量为1010654,句子数量为4332554\n",
      "“blake-poems.txt”的文本长度为38153,词汇数量为8354,句子数量为38153\n",
      "“bryant-stories.txt”的文本长度为249439,词汇数量为55563,句子数量为249439\n",
      "“burgess-busterbrown.txt”的文本长度为84663,词汇数量为18963,句子数量为84663\n",
      "“carroll-alice.txt”的文本长度为144395,词汇数量为34110,句子数量为144395\n",
      "“chesterton-ball.txt”的文本长度为457450,词汇数量为96996,句子数量为457450\n",
      "“chesterton-brown.txt”的文本长度为406629,词汇数量为86063,句子数量为406629\n",
      "“chesterton-thursday.txt”的文本长度为320525,词汇数量为69213,句子数量为320525\n",
      "“edgeworth-parents.txt”的文本长度为935158,词汇数量为210663,句子数量为935158\n",
      "“melville-moby_dick.txt”的文本长度为1242990,词汇数量为260819,句子数量为1242990\n",
      "“milton-paradise.txt”的文本长度为468220,词汇数量为96825,句子数量为468220\n",
      "“shakespeare-caesar.txt”的文本长度为112310,词汇数量为25833,句子数量为112310\n",
      "“shakespeare-hamlet.txt”的文本长度为162881,词汇数量为37360,句子数量为162881\n",
      "“shakespeare-macbeth.txt”的文本长度为100351,词汇数量为23140,句子数量为100351\n",
      "“whitman-leaves.txt”的文本长度为711215,词汇数量为154883,句子数量为711215\n"
     ]
    }
   ],
   "source": [
    "from nltk.corpus import gutenberg  \n",
    "for fileid in gutenberg.fileids():\n",
    "    raw=gutenberg.raw(fileid)     #获取每个文件的原始内容\n",
    "    num_length=len(raw)           #统计文本长度\n",
    "    words=gutenberg.words(fileid) #获取每个文件中的词汇\n",
    "    num_words=len(words)          #统计词汇的数量\n",
    "    sents=gutenberg.raw(fileid)   #获取每个文件中的句子\n",
    "    num_sents=len(sents)          #统计句子的数量\n",
    "    print(\"“%s”的文本长度为%d,词汇数量为%d,句子数量为%d\"%(fileid,num_length,num_words,num_sents))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
